--- title: '[RC17]Clustering' author: "Nina-Lydia Kazakou" date: "04/03/2022" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Set-up ### Load libraries ```{r message=FALSE, warning=FALSE} library(SingleCellExperiment) library(Seurat) library(tidyverse) library(Matrix) library(scales) library(cowplot) library(RCurl) library(scDblFinder) library(Matrix) library(ggplot2) library(edgeR) library(dplyr) library(ggsci) library(here) ``` ### Colour Palette ```{r load_palette} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3<-pal_lancet("lanonc", alpha = 0.7)(9) mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4) ``` ### Load object ```{r} RC17_norm <- readRDS(here("data", "Processed", "AllCells", "RC17_norm_srt.rds")) ``` # Clustering ### Some more QC Plots ```{r fig.height=6, fig.width=10} Idents (RC17_norm) <- "Sample" VlnPlot(RC17_norm, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),ncol = 3, pt.size = 0.1, cols = mycoloursP[3:40]) plot1 <- FeatureScatter(RC17_norm, feature1 = "nCount_RNA", feature2 = "percent.mt", cols = mycoloursP[3:40]) plot2 <- FeatureScatter(RC17_norm, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", cols = mycoloursP[3:40]) plot1 + plot2 ``` ## Identify Variable Features: ```{r} RC17_norm <- FindVariableFeatures(RC17_norm, selection.method = "vst", nfeatures = 2000) ``` ## Identify the 10 most highly variable genes: ```{r message=FALSE, warning=FALSE} top10 <- head(VariableFeatures(RC17_norm), 10) plotA <- VariableFeaturePlot(RC17_norm, cols = mycoloursP[4:5]) plotB <- LabelPoints(plot = plotA, points = top10, repel = TRUE) plotB ``` ## Scale all genes (if there is a batch effect I could add vars to regress here): ```{r} all.genes <- rownames(RC17_norm) RC17_norm <- ScaleData(RC17_norm, features = all.genes) ``` ## Linear dimensional reduction: ```{r} RC17_norm <- RunPCA(RC17_norm, features = VariableFeatures(RC17_norm), npcs = 100) ``` ```{r} VizDimLoadings(RC17_norm, dims = 1:2, reduction = "pca") ``` ```{r} DimPlot(RC17_norm, reduction = "pca", cols= mycoloursP[22:50]) DimHeatmap(RC17_norm, dims = 1, cells = 500, balanced = TRUE) ``` ```{r fig.height=16, fig.width=10} DimHeatmap(RC17_norm, dims = 1:20, cells = 500, balanced = TRUE) ``` ```{r fig.height=6, fig.width=10} PCAPlot(RC17_norm, split.by = "orig.ident", cols= mycoloursP[22:50]) ``` ## Dimentionality Reduction: ```{r} ElbowPlot(RC17_norm) ``` ```{r} RC17_norm <- FindNeighbors(RC17_norm, dims = 1:18, verbose = FALSE) RC17_norm <- FindClusters(RC17_norm, resolution = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), verbose = FALSE) head(RC17_norm@meta.data, 2) ``` Load original object: ```{r} RC17_norm <- readRDS(here("data", "Processed", "Original_Objects", "ScaterQC_ScranNorm_ScaleDataSeurat.rds")) ``` I have previously examined different resolutions for clustering these data and decided that *RNA_snn_res.0.8* works best, based both on approximation models and general cell-type markers found in literature. ```{r message=FALSE} Idents(object = RC17_norm) <- "RNA_snn_res.0.8" #assign cluster identity RC17_norm <- RunUMAP(RC17_norm, dims = 1:18, reduction = "pca") ``` ```{r} DimPlot(RC17_norm, reduction = "umap", label = TRUE, pt.size = 0.8, cols = mycoloursP) DimPlot(RC17_norm, reduction = "pca", label = F, cols = mycoloursP, pt.size = 1) ``` Now, I will check the contribution of each sample to each cluster, to see if clusters are made up solely by cells derived from ont timepoint: ```{r} IDsPerCluster <- as.data.frame(tapply(RC17_norm@meta.data$Sample, RC17_norm@meta.data$RNA_snn_res.0.8, function(x) length(unique(x)) )) names(IDsPerCluster) <- "NumberOfIDs" IDsPerCluster$RNA_snn_res.0.8 <- rownames(IDsPerCluster) IDsPerCluster$Cluster <- rownames(IDsPerCluster) head(IDsPerCluster,2) #write.csv(IDsPerCluster, here("outs", "Processed", "AllCells", "ClusterContribution_RNA_snn_res.0.8.csv")) ``` It seems like all clusters are made up by cells from all three timepoints. Later on, I will also check how many cells from each timepoint make up each cluster for my chosen resolution. ## Cell-cycle scoring ```{r warning=FALSE} RC17_norm <- CellCycleScoring(object = RC17_norm, g2m.features = cc.genes$g2m.genes,s.features = cc.genes$s.genes) head((RC17_norm@meta.data)) ``` ```{r fig.height=6, fig.width=12} VlnPlot(RC17_norm, features = c("S.Score","G2M.Score"), pt.size = 0.01, cols = mycoloursP) ``` Some more plots: ```{r Doublets} DimPlot(RC17_norm, reduction = "umap", label = F, group.by = "scDblFinder.class", cols = c(mycoloursP[25] , mycoloursP[26]), pt.size = 0.6) ``` ```{r Cell-Cycle} DimPlot(RC17_norm, reduction = "umap", label = F, group.by = "Phase", cols = mycoloursP[27:50], pt.size = 0.6) ``` ```{r fig.height=8, fig.width=15} DimPlot(RC17_norm, reduction = "umap", label = T, group.by = "RNA_snn_res.0.8", split.by = "Phase", cols = mycoloursP[27:50], pt.size = 0.6) ``` ## Cluster Annotation ```{r} Idents(RC17_norm) <- "RNA_snn_res.0.8" DefaultAssay(RC17_norm) <- "RNA" ``` ```{r Oligodendroglia} FeaturePlot(RC17_norm, reduction = "umap", features = c("OLIG2", "OLIG1", "OLIG3", "SOX10"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "red"), ncol = 2) ``` ```{r OPCs, fig.height=12, fig.width=8} FeaturePlot(RC17_norm, features = c("PDGFRA", "CSPG4", "GPR17", "PTPRZ1", "PCDH15", "PTGDS", "BCAN", "CABLES1", "GFRA1", "LINC01965"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "red"), ncol = 2) ``` ```{r COPs, fig.height=16, fig.width=8} FeaturePlot(RC17_norm, features = c("ETV1", "CHST9", "MYT1", "TENM2", "CAMK2A", "KCNQ5", "SEMA5B", "SYT1", "GPR17", "BMPER", "EPHB1", "ARHGAP24", "DOCK8"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "red"), ncol = 2) ``` ```{r Oligodendrocytes, fig.height=14, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c("MBP", "PLP1", "CNP", "MAG", "MOG", "BCAS1", "ZNF488", "GALC", "MOBP"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "red"), ncol = 2) ``` ```{r Mature_Oligodendrocytes, fig.height=18, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c("SNAP25", "CNTN1", "FRY", "PLXDC2", "DYSF", "PTPRM", "FOS", "VIM", "JUNB", "AFF3", "KANK4", "FSTL5", "SGCZ", "MDGA2"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "red"), ncol = 2) ``` ```{r TF, fig.height=8, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c("SOX4", "SOX11", "MLLT11", "ZNF711", "EZH2", "DACH2"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "red"), ncol = 2) ``` ```{r Radial_Glial, Neuroprogenitors, fig.height=8, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c( "PAX6", "HES1", "HES5", "SOX2", "CDH2", "VIM"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkorchid"), ncol = 2) ``` ```{r Astrocytes, fig.height=20, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c("S100B", "SLC1A3", "EFNB3", "TFF3", "SPARCL1", "SPON1", "TGFB2", "GJA1", "AQP4", "GLUL", "SOX9", "NDRG2", "GFAP", "ALDH1A1", "APOE", "FGFR3"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkgreen"), ncol = 2) ``` ```{r Astrocytes_2, fig.height=28, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = ,c( "TENM2", "CCDC85A", "SLC38A1", "GALNT15", "BNC2", "CFAP299", "SPAG17", "DTHD1", "ADGB", "UAP1", "SPOCD1", "CLCF1", "SORCS1", "SAMHD1", "NRGN", "CAMK2A", "THY1", "ENC1", "SYT1", "CALM3", "ST18", "CTNNA3", "APLNR"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkgreen"), ncol = 2) ``` ```{r Pericytes, fig.height=12, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c("PDGFRB", "COL1A1", "COL1A2", "ACTA2", "SPARC", "BGN", "S100A10"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkorange"), ncol = 2) ``` ```{r Neurons, fig.height=6, fig.width=8} FeaturePlot(RC17_norm, features = c("SNAP25", "STMN2", "RBFOX3", "GABRB2"),order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2) ``` ```{r Inhibitory_Neurons, fig.height=24, fig.width=8} FeaturePlot(RC17_norm, features = c("LAMA3", "VIP", "TAC3", "RYR3", "TSHZ2", "RELN", "IL1RAPL2", "CXCL14", "EYA4", "FBXL7", "KIT", "MEIS2", "PBX3", "PLCL1", "MYO5B", "TRHDE", "PLCH1"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2) ``` ```{r Ecitatory_Neurons} FeaturePlot(RC17_norm, features = c("SATB2", "SLC12A6", "FEZF2","RORB"),order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2) ``` ```{r Glutaminergic_Neurons} FeaturePlot(RC17_norm, reduction = "umap", features = c("GLS", "GRIN2B", "SLC17A6"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2) ``` ```{r Serotonergic_Neurons, fig.height=3, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c("SLC6A4", "TPH1"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2) ``` ```{r Neuronal_Receptors, fig.height=3, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c("GABBR1", "SLC32A1"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2) ``` ```{r General_Neuronal_Markers, fig.height=18, fig.width=8} FeaturePlot(RC17_norm, reduction = "umap", features = c("DCX", "DLG4", "FOS", "FOXG1", "GPHN", "NCAM1", "NEFL", "NEUROD1", "RBFOX3", "RBFOX1", "MAP2", "SYP", "SYNPR", "TUBB3"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2) ``` ```{r Microglia} FeaturePlot(RC17_norm, reduction = "umap", features = c("CX3CR1", "CD68", "CD40"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "purple"), ncol = 2) ``` ```{r Interesting_post-mortem_Markers} FeaturePlot(RC17_norm, reduction = "umap", features = c("RBFOX1", "SPARC", "NELL1"), order = TRUE, min.cutoff = 'q10', label = TRUE, cols = c("lightgrey", "darkorchid3"), ncol = 2) ``` ## Assign ClusterID ```{r message=FALSE, warning=FALSE} RC17_norm$ClusterID <- RC17_norm$RNA_snn_res.0.8 current.ids <- c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23) new.ids <- c("Radial_Glia_1", "Radial_Glia/Pre-OPCs_1", "Astrocytes_1", "Astrocytes_2", "Oligodendroglia_1", "Unknown", "Radial_Glia_2", "Outer-Radial_Glia_1", "Outer-Radial_Glia_2", "Radial_Glia/Pre-OPCs_2", "Neurons_1", "Neurons_2", "Oligodendroglia_2", "Radial_Glia_3", "Oligodendroglia_3", "Neurons_3", "Neurons_3", "Unknown", "Oligodendroglia_4", "Oligodendroglia_5", "Oligodendroglia_6", "Unknown", "Pericytes", "Outer-Radial_Glia_3") RC17_norm@meta.data$ClusterID <- plyr::mapvalues(x = RC17_norm@meta.data$ClusterID, from = current.ids, to = new.ids) head(RC17_norm@meta.data, 2) ``` ```{r fig.height=8, fig.width=8} DimPlot(RC17_norm, reduction = "umap", pt.size = 0.8, group.by = "ClusterID", cols = mycoloursP[6:40], label = TRUE) & NoLegend() ``` Some more plots: ```{r Doublets_2, fig.height=6, fig.width=10} DimPlot(RC17_norm, reduction = "umap", label = TRUE, group.by = "ClusterID", split.by = "scDblFinder.class", cols = mycoloursP, pt.size = 0.6) & NoLegend() ``` ```{r Cell-Cycle_2, fig.height=7, fig.width=16} DimPlot(RC17_norm, reduction = "umap", label = TRUE, group.by = "ClusterID", split.by = "Phase", cols = mycoloursP, pt.size = 0.6) & NoLegend() ``` ```{r Timepoint, fig.height=7, fig.width=16} DimPlot(RC17_norm, reduction = "umap", label = TRUE, group.by = "ClusterID", split.by = "Sample", cols = mycoloursP, pt.size = 0.6) & NoLegend() ``` ```{r} DimPlot(RC17_norm, reduction = "umap", label = FALSE, group.by = "Sample", cols = mycoloursP[5:40], pt.size = 0.6) ``` ### Check how many cells from each timepoint make up each cluster for my chosen resolution. ```{r} n_cells <- FetchData(RC17_norm, vars = c("ClusterID", "orig.ident")) %>% dplyr::count(ClusterID, orig.ident) %>% tidyr::spread(ClusterID, n) head(n_cells, 5) #write.csv(n_cells, here("outs", "Processed", "AllCells", "no.ofCellsperCluster.csv")) ``` #### Clean-up metadata: ```{r} RC17_norm@meta.data$percent.mito <- NULL RC17_norm@meta.data$LibSize_high <- NULL RC17_norm@meta.data$LibSize_low <- NULL RC17_norm@meta.data$low_nFeatures <- NULL RC17_norm@meta.data$high_nFeatures <- NULL RC17_norm@meta.data$high_subsets_MitoPerc <- NULL RC17_norm@meta.data$ScaterQC <- NULL RC17_norm@meta.data$label <- NULL RC17_norm@meta.data$scDblFinder.sample <- NULL RC17_norm@meta.data$scDblFinder.cluster <- NULL RC17_norm@meta.data$scDblFinder.nearestClass <- NULL RC17_norm@meta.data$scDblFinder.difficulty <- NULL RC17_norm@meta.data$scDblFinder.cxds_score <- NULL RC17_norm@meta.data$scDblFinder.mostLikelyOrigin <- NULL RC17_norm@meta.data$scDblFinder.weighted <- NULL RC17_norm@meta.data$scDblFinder.ratio <- NULL RC17_norm@meta.data$scDblFinder.originAmbiguous <- NULL RC17_norm@meta.data$RNA_snn_res.0.5 <- NULL RC17_norm@meta.data$RNA_snn_res.0.6 <- NULL RC17_norm@meta.data$RNA_snn_res.0.7 <- NULL RC17_norm@meta.data$RNA_snn_res.1 <- NULL RC17_norm@meta.data$seurat_clusters <- NULL RC17_norm@meta.data$Celltype_res.0.8 <- NULL RC17_norm@meta.data$Cluster_id <- NULL RC17_norm@meta.data$dataset <- NULL RC17_norm@meta.data$outlier <- RC17_norm@meta.data$discard RC17_norm@meta.data$discard <- NULL RC17_norm@meta.data$Dataset <- "RC17_Monolayer_AllCells" ``` ```{r} #saveRDS(RC17_norm, here("data", "Processed", "AllCells", "RC17_AllCells_srt.rds")) ``` ```{r} sessionInfo() ```